
#THIS FILE PUT TOGETHER RECALLS AND DATES

filenames<-list.files(pattern="Terugroep_actie_status")

#Get English name of download dates, order files by date
filedate<-str_match(filenames, "Terugroep_actie_status_(.*?).RData" )[,2]
filedate<-gsub("[Jj]anuari", "january",filedate )
filedate<-gsub("[Ff]ebruari", "february",filedate )
filedate<-gsub("[Mm]aart", "march",filedate )
filedate<-gsub("[Mm]ei", "may",filedate )
filedate<-gsub("[Jj]uni", "june",filedate )
filedate<-gsub("[Jj]uli", "july",filedate )
filedate<-gsub("[Aa]ugustus", "august",filedate )
filedate<-gsub("[Oo]ktober", "october",filedate )
Files<-cbind.data.frame(filenames,as.Date(dmy(filedate)),stringsAsFactors=FALSE)
colnames(Files)<-c("path", "date")
Files<- Files[order(Files$date),]
Files$order<-order(Files$date)
rm(filedate, filenames)

#Get Main file (the earliest we have), change name, add date
d<-1
load(Files$path[d])

#Drop dots from variable names
names(x)<-gsub("\\.", "",names(x))
#All lower case
names(x)<-tolower(names(x))
#Transform factor in character
i <- sapply(x, is.factor)
x[i] <- lapply(x[i], as.character)
rm(i)
#Change name
assign("Recall",x)
rm(x)
gc()

#Pre-allocate new variables and drop redundant
Recall$recall_new<-as.Date(NA ,origin="1970-01-01", format="%Y-%m-%d")
Recall$recall_fixed<-as.Date(NA ,origin="1970-01-01", format="%Y-%m-%d")
Recall$recall_out<-as.Date(NA ,origin="1970-01-01", format="%Y-%m-%d")
Recall$missing<-0
Recall$status_first<-Recall$codestatus
Recall$status_last<-Recall$codestatus
Recall$codestatus<-NULL
Recall$status<-NULL




for(j in 2:(nrow(Files))) {
  
  #Statistics to show in message
  stats<-c(NA, NA, NA, NA)
  
  #Load file to add
  load(Files$path[j])
  x$date<-Files$date[j]
  x$Status<-NULL
  
  #Drop dots from variable names
  names(x)<-gsub("\\.", "",names(x))
  #All lower case
  names(x)<-tolower(names(x))
  #Transform factor in character
  i <- sapply(x, is.factor)
  x[i] <- lapply(x[i], as.character)
  rm(i)
  
  #Merge recall main file with new file
  Recall<-merge(x=Recall, y=x, by=c("kenteken", "referentiecoderdw"), all=TRUE)
  rm(x)
  gc()
  
  #ADD VARIABLE Recall_new
  #Date in which a new Kenteken/Referentiecode appears in the dataset for the first time
  #It can be a new recall for an existing car (imported or already in NL) OR
  #a fixed recall for a used car imported in NL
  #If a car was already in the dataset (status first not NA) but disappeared for a while in the dataset (i.e. exported), is not considered as new
  k<-is.na(Recall$status_last)==TRUE & is.na(Recall$codestatus)==FALSE&is.na(Recall$status_first)==TRUE
  Recall$recall_new[k]<-Recall$date[k]
  Recall$status_first[k]<-Recall$codestatus[k]
  stats[1]<-length(which(k))
  rm(k)
  
  
  #ADD VARIABLE Missing
  #If a variable with already Recalled_out value reappears, CLEAR Recall_out, add 1 to Missing
  k<-is.na(Recall$recall_out)==FALSE & is.na(Recall$date)==FALSE
  Recall$recall_out[k]<-NA
  Recall$missing[k]<-Recall$missing[k]+1
  stats[2]<-length(which(k))
  rm(k)
  
  #ADD VARIABLE Recall_fixed
  #Date in which a open recall status is recoreded solved for the last time
  k<-Recall$status_last=="O"&Recall$codestatus=="P"&is.na(Recall$status_last)==FALSE&is.na(Recall$codestatus)==FALSE
  Recall$recall_fixed[k]<-Recall$date[k]
  stats[3]<-length(which(k))
  rm(k)
  
  #ADD VARIABLE Recall_out
  #Date in which a car disappears from the database (probably because scrapped/sold abroad)
  k<-is.na(Recall$status_last)==FALSE&is.na(Recall$codestatus)==TRUE&is.na(Recall$recall_out)==TRUE
  Recall$recall_out[k]<-Files$date[j]
  stats[4]<-length(which(k))
  rm(k)
  
  #UPDATE Status_last
  #Replace previous last status with new non-missing status variable
  #UNLESS recall is marked as solved and now reopened (some fixed recalls are re-opened and re-solved within a short period of time )
  #Then drop status variable from secondary file
  k1<-(Recall$status_last=="P" & Recall$codestatus=="O") 
  k1[is.na(k1)]<-FALSE
  k<-is.na(Recall$codestatus)==FALSE & k1==FALSE
  Recall$status_last[k]<-Recall$codestatus[k]
  Recall$codestatus<-NULL
  rm(k, k1)
  #Missing variable with NA value (just added) are coded as 0
  Recall$missing[is.na(Recall$missing)]<-0
  #Drop date
  Recall$date<-NULL
  
  print(paste("ITERATION ", j, ", ",Files$date[j],  sep=""))
  print(paste("New recalls ", stats[1], sep=""))
  print(paste("Fixed ", stats[3], sep=""))
  print(paste("Out ", stats[4], sep=""))
  print(paste("Missing ", stats[2], sep=""))
  
}

Recallw<-Recall
save(Recallw, file="Recall_week1.RData")

